STAA 566 Will Paces Homework 2

I felt as though a dynamic plot would be a good vehicle for displaying Colorado’s covid case data - the ability to see both the general trends across counties as well as selecting a specific county for greater detail helps to draw comparisons between regions. This data was sourced from the New York Times’ github repository. I restriced this investigation to the 15 largest (most populated) counties.

# Load libraries
library(ggplot2)
library(htmlwidgets)
library(plotly)
library(tidyr)
library(dplyr)
library(readr)
# Download Covid Data ----
cases_data <- read.csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv", 
                       stringsAsFactors = F)

# Subset Colorado data
co.data <- cases_data[cases_data$state == 'Colorado', ]

# Generate weekly averages
co.rolling.cases <- co.data %>%
  arrange(date) %>%
  mutate(cases_1day  = cases  - dplyr::lag(cases,1),
         deaths_1day = deaths - dplyr::lag(deaths,1),
         cases_7day  = zoo::rollmean(cases_1day, 7, fill=NA, align="right"),
         deaths_7day = zoo::rollmean(deaths_1day, 7, fill=NA, align="right"))

# 15 largest CO counties (by population)
largest.counties <- c('El Paso',
                      'Denver',
                      'Arapahoe',
                      'Jefferson',
                      'Adams',
                      'Larimer',
                      'Douglas',
                      'Boulder',
                      'Weld',
                      'Pueblo',
                      'Mesa',
                      'Broomfield',
                      'Garfield',
                      'Eagle',
                      'La Plata')

co.rolling.cases <- co.rolling.cases[co.rolling.cases$county %in% largest.counties, ]
co.rolling.cases$county <- factor(co.rolling.cases$county, levels = largest.counties)

# Pivot to long-format data and separate cumulative from weekly average data
all.co.long <- co.rolling.cases %>% 
  select(date, county, cases, deaths) %>%
  pivot_longer(c('cases', 'deaths'), names_to = "type", values_to = 'Count') %>%
  drop_na()
all.co.long$time <- rep('Cumulative', times = nrow(all.co.long))

week.co.long <- co.rolling.cases %>%
  select(date, county, cases_7day, deaths_7day) %>%
  pivot_longer(c('cases_7day', 'deaths_7day'), names_to = "type", values_to = 'Count') %>%
  drop_na()
week.co.long$type <- gsub("_7day", '', week.co.long$type)
week.co.long$time <- rep('Weekly Average', times = nrow(week.co.long))

day.co.long <- co.rolling.cases %>%
  select(date, county, cases_1day, deaths_1day) %>%
  pivot_longer(c('cases_1day', 'deaths_1day'), names_to = "type", values_to = 'Count') %>%
  drop_na()
day.co.long$type <- gsub("_1day", '', day.co.long$type)
day.co.long$time <- rep('Daily Change', times = nrow(day.co.long))

# Combine data
co.long <- rbind(all.co.long, week.co.long, day.co.long)

# Ensure date is in correct format and specify time factor order
co.long$date <- as.Date(co.long$date)
co.long$time <- factor(co.long$time, levels = c('Cumulative', 'Weekly Average', 'Daily Change'))
co.long$type <- ifelse(co.long$type == 'cases', 'Cases', 'Deaths')

# Generate figure with ggplot
co.ggp <- ggplot(co.long, aes(x = date, y = Count)) + 
  geom_line(aes(color = county)) +
  theme_minimal(base_size = 12) +
  facet_grid(type ~ time, scales = 'free_y') +
  scale_x_date(breaks = "1 year", 
               minor_breaks = "6 months", 
               date_labels = "%Y",
               limits=c(as.Date("2020-01-01"), NA)) +
  scale_color_viridis_d() +
  labs(color = 'County')


# Convert to Plotly figures
co.ptly <- ggplotly(co.ggp) %>%
  layout(autosize = T)

Colorado Covid Dashboard